####################################
# Bayesian Multiple Linear Regression
# Author: Michail Tsikerdekis
# Description: It builds a linear regression model, produces outputs for coefficients and R^2
####################################

# Clear Workspace
rm(list=ls(all=TRUE)) 
set.seed=06192014
#setwd("~/Dropbox/BayesChapter") # if source files are not found point to the directory using this command

####################################
# Book data preparation
####################################
source("generate.R")
email$technicalEfficacy = email$responseTime * runif(length(email$responseTime), 0.0, 5.0)

####################################
# Data Input
# output is the output variable of the model
# predictor[i] contains a set of predictor variables.
# prior with equal probabilities across the board
####################################
output = email$responseTime
predictor = email$technicalEfficacy

# Model parameters
n.simu <- 50000
n.burnin <- n.simu/2
par <- c("b0","b1","r.squared")

# Hypothesis testing parameters
confint = .95 # Confidence interval
compval = 0 # The null value. For HDI in ROPE this can be virtually anything.
ropeRad = 0.2 # ROPE radius in case ROPE is used to test hypotheses

# Others options
convergencetests <- T
plotting <- T

####################################
# Additional calculated variables
####################################
D <- list(y=output, x= predictor, N=length(output))

####################################
# Load or Install Required Packages
####################################
packages = c("R.utils","R2jags","MCMCpack","coda","R2OpenBUGS","plyr","ggplot2","plyr","BEST")
for (package in packages){
  if (require(package,character.only=TRUE)) {require(package,character.only=TRUE)} else {
    install.packages(package)
    library(package,character.only=TRUE) # This will ensure that the code fails if the installation failed
  }
}

####################################
# Setup JAGS model
####################################
jags.bin <- function() {
  # Likelihood:
  for(i in 1:N) {
    y[i] ~ dnorm(f[i], tau) #y is observed, f is modeled
    f[i] <- b0 + b1 * x[i]
  }
  
  # Priors
  tau <- 1/pow(sigma,2)
  sigma ~ dunif(0, 1000)
  b0 ~ dnorm(0, 0.001)
  b1 ~ dnorm(0, 0.001)
  
  # R-squared
  y.mean <- mean(y[])
  for (i in 1:N) {
    ss.res.temp[i] <- y[i] - f[i] # Residual sum of squares
    ss.res[i] <- pow(ss.res.temp[i], 2) 
    ss.reg.temp[i] <- f[i] - y.mean # Regression sum of squares
    ss.reg[i] <- pow(ss.reg.temp[i], 2) 
    ss.tot.temp[i] <- y[i] - y.mean # Total sum of squares
    ss.tot[i] <- pow(ss.tot.temp[i], 2)
  }
  r.squared <- (sum(ss.reg[])) / (sum(ss.tot[]))
}
write.model(jags.bin, "jags.txt")

####################################
# Building model using MCMC sampling
####################################
m.jags <- jags.model("jags.txt", data = D, n.adapt = n.burnin, quiet = TRUE, n.chains = 4)
s <- coda.samples(m.jags, par, n.iter = n.simu - n.burnin, quiet = TRUE)
if (convergencetests){gelman.plot(s)}
if (convergencetests){plot(s)}


# Converting coda-object to data frame object in order to handle chains and variables.
df = as.data.frame(as.matrix( s ))

####################################
# Plotting posterior results
####################################
if (plotting){
  # Comparing density posterior plots of two thetas
  nrecords = nrow(df)
  
  print(ggplot(data=df, aes(x=r.squared)) + geom_density(alpha = .3) +
    geom_vline(data=df, aes(xintercept=mean(r.squared)), color="red", linetype="dashed", size=1) + theme_minimal() +
    xlim(0, 1) +  ggtitle("Probability Distribution for R Squared") +xlab(bquote(paste("R"^2))) +
    ylab("Probability Density"))
}


####################################
# Means for Betas and R-squared
####################################
mean(df$b0)
c(hdi(df$b0,.95)[1],hdi(df$b0,.95)[2])
mean(df$b1)
c(hdi(df$b1,.95)[1],hdi(df$b1,.95)[2])
mean(df$r.squared)
c(hdi(df$r.squared,.95)[1],hdi(df$r.squared,.95)[2])

####################################
# Producing similar results using Zelig
####################################
library(Zelig)
df = data.frame(output=output,predictor=predictor)
z.out <- zelig(output ~ predictor, model = "normal.bayes", data=df, mcmc = n.simu - n.burnin, burnin = n.burnin)
summary(z.out)

